************* NOTE: pause Dropbox synching before running!
	/* right-click Dropbox icon, left-click bottom left of the window */

	
				
****************
* Monthly data *
****************
		
* Import factor data	
	// global
		foreach x in 3FF_global ///
		{
			clear all
			import excel "orig/`x'.xlsx", sheet("monthly") firstrow
			destring year month, replace
			gen time = ym(year, month) 
			drop year month 
			format time %tm
			save "data/`x'.dta", replace
		}
	// USA
		foreach x in 3FF_USA ///
		{
			clear all
			import excel "orig/`x'.xlsx", sheet("monthly") firstrow
			destring year month, replace
			gen time = ym(year, month) 
			drop year month
			format time %tm
			save "data/`x'.dta", replace
		}
			
* Import data from Excel to Stata and set time
	foreach x in stir ltir share_prices ///
	{
		clear all
		import excel "orig/OECD/`x'.xlsx", sheet("final_series_monthly") firstrow
		destring year month, replace
		gen time = ym(year, month) 
		format time %tm
		drop year_month date
		save "data/`x'.dta", replace
	}
		
* Merge data together
	clear all
	use "data/stir.dta"
	foreach x in ltir share_prices ///
	{
		merge m:m cou time using "data/`x'.dta", keep(match master)
		drop _merge
	}
	******** IMPORTANT: eliminate hyphens in country group names, otherwise the routines below get stuck
	replace cou = "G7" if cou == "G-7"
	replace cou = "G20" if cou == "G-20"
	******** also important: delete EA19 series
	drop if cou == "EA19"
		
* Eliminate country blocks from the original data set
	foreach x in EA19 EU27_2020 G20 G7 G7M OECD OECDE {
		drop if cou == "`x'"
	}

* Create return variables	
	// create stock returns
		***** NOTE: delete this variable if you create quarterly or annual data sets
		egen id = group(cou)
		xtset id time
		duplicates list id time
		tsset id time
		gen ret = ln(share_prices) - ln(l.share_prices)
	// express stir and ltir in decimals
		foreach x in stir ltir {
			gen `x'100 = `x' / 100
			drop `x'
			rename `x'100 `x'
		}
	// generate global returns (proxy for market returns, defined as average return across countries)
		egen glo_ret = mean(ret), by (time)
		egen glo_stir = mean(stir), by (time)
	// create excess returns
		gen ret_rf = ret - stir
		gen glo_ret_rf = glo_ret - glo_stir
		
* Estimate CAPM alphas and betas
	// for each stock, regress country-level excess returns on global excess returns
		bys id : asreg ret_rf glo_ret_rf, se wind(time 36)
			****** the option "se" yields standard errors to calculate t-stats
		drop _Nobs _R2 _adjR2 
		foreach x in glo_ret_rf cons {
		rename _b_`x' b_`x'
		rename _se_`x' se_`x'
		gen t_`x' = b_`x' / se_`x'
		}
		drop if b_cons==0
	// check for outliers
		tabstat b_glo_ret_rf b_cons, statistics(min p1 p5 p25 p50 p75 p95 p99 max n)
	// check t-stats
		tabstat t_glo_ret_rf t_cons, statistics(min p1 p5 p25 p50 p75 p95 p99 max n)
		drop se_* t_*
	// rename variables
		rename b_glo_ret_rf beta_capm
		rename b_cons alpha_capm
	// generate risk premium
		gen rp_capm = beta * glo_ret_rf		
				
* Estimate 3FF alphas and betas: global
	// merge with factor data
		foreach x in 3FF_global ///
		{
			merge m:m time using "data/`x'.dta", keep(match master)
			drop _merge
		}
		xtset id time
	// take logs and rename
		foreach x in mkt smb hml rf {
			gen ln_`x' = ln(1+`x'/100)
			drop `x'
			rename ln_`x' `x'_glo
		}
	// for each stock, regress country-level excess returns on global excess returns
		bys id : asreg ret_rf mkt_glo smb_glo hml_glo, se wind(time 36)
			****** the option "se" yields standard errors to calculate t-stats
		drop _Nobs _R2 _adjR2 
		foreach x in mkt_glo smb_glo hml_glo cons {
		rename _b_`x' b_`x'
		rename _se_`x' se_`x'
		gen t_`x' = b_`x' / se_`x'
		}
		drop if b_cons==0
	// check for outliers
		tabstat b_mkt_glo b_smb_glo b_hml_glo b_cons, statistics(min p1 p5 p25 p50 p75 p95 p99 max n)
	// check t-stats
		tabstat t_mkt_glo t_smb_glo t_hml_glo t_cons, statistics(min p1 p5 p25 p50 p75 p95 p99 max n)
		drop se_* t_*
	// rename variables
		rename b_cons alpha_glo
	// generate risk premium
		gen rp_glo = b_mkt_glo * mkt_glo + b_smb_glo * smb_glo + b_hml_glo * hml_glo
				
* Estimate 3FF alphas and betas: USA
	// merge with factor data
		foreach x in 3FF_USA ///
		{
			merge m:m time using "data/`x'.dta", keep(match master)
			drop _merge
		}
		xtset id time
	// take logs and rename
		foreach x in mkt smb hml rf {
			gen ln_`x' = ln(1+`x'/100)
			drop `x'
			rename ln_`x' `x'_usa
		}
	// for each stock, regress country-level excess returns on global excess returns
		bys id : asreg ret_rf mkt_usa smb_usa hml_usa, se wind(time 36)
			****** the option "se" yields standard errors to calculate t-stats
		drop _Nobs _R2 _adjR2 
		foreach x in mkt_usa smb_usa hml_usa cons {
		rename _b_`x' b_`x'
		rename _se_`x' se_`x'
		gen t_`x' = b_`x' / se_`x'
		}
		drop if b_cons==0
	// check for outliers
		tabstat b_mkt_usa b_smb_usa b_hml_usa b_cons, statistics(min p1 p5 p25 p50 p75 p95 p99 max n)
	// check t-stats
		tabstat t_mkt_usa t_smb_usa t_hml_usa t_cons, statistics(min p1 p5 p25 p50 p75 p95 p99 max n)
		drop se_* t_*
	// rename variables
		rename b_cons alpha_usa
	// generate risk premium
		gen rp_usa = b_mkt_usa * mkt_usa + b_smb_usa * smb_usa + b_hml_usa * hml_usa
				
* Create rolling mean and standard deviation of returns
	// generate additional return variables
		// moving averages and standard deviations
			sort id time
			foreach x in stir ltir ret {
				by id: asrol `x', stat(mean) window(time 12)
				by id: asrol `x', stat(sd) window(time 12)
			}
		// shorten variable names
			foreach x in stir ltir ret {
				la var mean12_`x' mean12_`x'
				rename mean12_`x' m_`x'
				la var sd12_`x' sd12_`x'
				rename sd12_`x' sd_`x'
			}	
	// save data
		save "data/returns_data_monthly.dta", replace
			
* Eliminate individual series
	foreach x in stir ltir share_prices 3FF_USA 3FF_global ///
	{
		erase "data/`x'.dta"
	}
	
		

***************
* Annual data *
***************

* Import data from Excel to Stata and set time
	// ipi, inflation, share prices, unemployment, household spending, labor productivity, total productivity
		foreach x in stir ltir ///
		{
			clear all
			import excel "orig/OECD/`x'.xlsx", sheet("final_series") firstrow
			destring year, replace
			save "data/`x'_a.dta", replace
		}
		
* Annual averages (of monthly data)
		clear all
		use "data/returns_data_monthly.dta"
	// for return variables, take *SUM* by country and year (intuition: these are monthly logs)
		foreach x in ret rp_capm alpha_capm rp_glo alpha_glo rp_usa alpha_usa ///
		{
			egen `x'_y = sum(`x'), by (cou year)
		}
	// for betas and stock prices, take average by country and year
		foreach x in beta_capm share_prices ///
		b_mkt_glo b_smb_glo b_hml_glo ///
		b_mkt_usa b_smb_usa b_hml_usa ///
		{
			egen `x'_y = mean(`x'), by (cou year)
		}
	// collapse by year
		collapse *_y, by(cou year)
	// rename series
		foreach x in beta_capm share_prices ///
		b_mkt_glo b_smb_glo b_hml_glo ///
		b_mkt_usa b_smb_usa b_hml_usa ///
		{
			rename `x'_y `x'
		}

* Annual series
	// merge
		foreach x in stir ltir ///
		{
			merge m:m cou year using "data/`x'_a.dta", keep(match master)
			drop _merge
		}
	// express bond rates (stir and ltir) in decimals	
		foreach x in stir ltir {
			gen `x'100 = `x' / 100
			drop `x'
			rename `x'100 `x'
		}		
			save "data/returns_data.dta", replace
	// delete individual series
		foreach x in stir ltir ///
		{
			erase "data/`x'_a.dta"
		}
